cap log close
log using "$logs/cr-susprates.log", replace

** Based off of cr-susprates-v3

**************************************************
*Analysis of LEA School Disc Outcomes
*
*This do file will be used for all analysis tasks.  
*Contents:
*1. Creating suspension rates
*2. Generating summary statistics and histograms
*3. Merging in county identifiers
**************************************************


use "$data/1216_lealevel_panel.dta", clear

unique leaid schendyr // unique by lea and sch yr

/*First, I will run some descriptive statistics on the suspension fields.
Dave has requested the following:
Suspension rate for each subgroup in the data set (black male SPED, white female 
no SPED etc.)
Suspension rates by race sub groups (all black students) and race by gender subgroup
(all black males)

Before I can generate these rates let me make sure I have total enrollment 
by these subgroups, otherwise I will have to start there.*/
*There are no total enrollments by race subgroups

foreach x in  bl wh   {
	egen enr_`x'_all=rowtotal(enr_`x'_m enr_`x'_f)
	la var enr_`x'_all "total: enr_`x'_m enr_`x'_f" 
	replace enr_`x'_all=. if mi(enr_`x'_m) & mi(enr_`x'_f)
	}
	

sum enr_* 

 list leaid schendyr discwodis_iss*  discwdis_iss* enr_bl_all enr_wh_all if leaid=="1201770"

**************************************************************************
*******************Suspension rates by race, race-by-gender subgroups ****
**************************************************************************

// for 2014 + 2016, presch students are in enrollment totals and have their own
// susp count vars; in 2012, no sep susp vars for preschool (presch are only out of sch)

// ISS (iss var is "one or more", whereas oos is only 1 vs more than 1)
local type iss
foreach x in bl wh { 
	egen `type'_`x'_all=rowtotal(discwodis_`type'_`x'_m discwodis_`type'_`x'_f discwdis_`type'_idea_`x'_m discwdis_`type'_idea_`x'_f )
	la var `type'_`x'_all "total: discwodis_`type'_`x'_m discwodis_`type'_`x'_f discwdis_`type'_idea_`x'_m discwdis_`type'_idea_`x'_f" 
	replace `type'_`x'_all=. if mi(discwodis_`type'_`x'_m) & mi(discwodis_`type'_`x'_f ) &mi(discwdis_`type'_idea_`x'_m ) & mi(discwdis_`type'_idea_`x'_f)
	
	foreach g in m f {
		egen `type'_`x'_`g'=rowtotal(discwodis_`type'_`x'_`g'  discwdis_`type'_idea_`x'_`g' )
		la var `type'_`x'_`g' "total: discwodis_`type'_`x'_`g'  discwdis_`type'_idea_`x'_`g' " 
		replace `type'_`x'_`g'=. if mi( discwodis_`type'_`x'_`g' ) & mi(discwdis_`type'_idea_`x'_`g')
		}
} // end bl wh loop

*Let me generate the rates for each subgroup
foreach i in bl wh  { 
	
	gen ps_`type'_`i'_all=`type'_`i'_all /enr_`i'_all
	lab var ps_`type'_`i'_all "`i' `type' all /enr`i'"
	
	foreach g in m f {
		gen ps_`type'_`i'_`g'=`type'_`i'_`g' /enr_`i'_`g'
		lab var ps_`type'_`i'_`g' "`i' `type' `g' /enr_`i'_`g''"
		} // end m f
	
	} // end bl wh 

*Now let me look at these
sum ps_`type'_bl_all-ps_`type'_wh_f
sum ps_`type'_bl_all-ps_`type'_wh_f, d

// how many are over 1? only a tiny fraction
foreach var of varlist ps_`type'_bl_all-ps_`type'_wh_f {
	di "`var'"
	qui count if !mi(`var')
	local N=r(N)
	count if `var'>1 & !mi(`var')
	di r(N)/`N' 
	}
// hardly makes diff for summary stats
foreach var of varlist ps_`type'_bl_all-ps_`type'_wh_f {
	di "`var'"
	su `var' 
	su `var' if `var'<=1 
	}

// 2012 has no preschool susp vars, so all are missings; these will just 
// be treated as 0s in rowtotals below
bys schendyr: su ps*

foreach type in  singoos multoos {

*Now let me generate the total suspension variables by race subgroups 
//Generate 
foreach x in bl wh { 
	
	egen `type'_`x'_all=rowtotal(discwodis_`type'_`x'_m discwodis_`type'_`x'_f ///
		discwdis_`type'_idea_`x'_m discwdis_`type'_idea_`x'_f ///
		psdisc_`type'_`x'_m psdisc_`type'_`x'_f)
	la var `type'_`x'_all "total: discwodis_`type'_`x'_m discwodis_`type'_`x'_f discwdis_`type'_idea_`x'_m discwdis_`type'_idea_`x'_f 	psdisc_`type'_`x'_m psdisc_`type'_`x'_f" 
	replace `type'_`x'_all=. if mi(discwodis_`type'_`x'_m) & mi(discwodis_`type'_`x'_f ) ///
		&mi(discwdis_`type'_idea_`x'_m ) & mi(discwdis_`type'_idea_`x'_f) ///
		&mi(psdisc_`type'_`x'_m) &mi(psdisc_`type'_`x'_f)
	
	foreach g in m f {
		egen `type'_`x'_`g'=rowtotal(discwodis_`type'_`x'_`g'  discwdis_`type'_idea_`x'_`g' psdisc_`type'_`x'_`g')
		la var `type'_`x'_`g' "total: discwodis_`type'_`x'_`g'  discwdis_`type'_idea_`x'_`g' psdisc_`type'_`x'_`g'" 
		replace `type'_`x'_`g'=. if mi( discwodis_`type'_`x'_`g' ) & mi(discwdis_`type'_idea_`x'_`g') &mi(psdisc_`type'_`x'_`g')
		} // end m f
	} // end bl wh

*Let me generate rates. I will add ps as the prefix for percent students
*There are LEAs with numbers larger than 1, this is weird

*Let me generate the rates for each subgroup
foreach i in bl wh  { 
	
	gen ps_`type'_`i'_all=`type'_`i'_all /enr_`i'_all
	lab var ps_`type'_`i'_all "`i' `type' all /enr`i'"
	
	foreach g in m f {
		gen ps_`type'_`i'_`g'=`type'_`i'_`g' /enr_`i'_`g'
		lab var ps_`type'_`i'_`g' "`i' `type' `g' /enr_`i'_`g''"
		} // end m f
	
	} // end bl wh 

*Now let me look at these
sum ps_`type'_bl_all-ps_`type'_wh_f
sum ps_`type'_bl_all-ps_`type'_wh_f, d

// how many are over 1? only a tiny fraction
foreach var of varlist ps_`type'_bl_all-ps_`type'_wh_f {
	di "`var'"
	qui count if !mi(`var')
	local N=r(N)
	count if `var'>1 & !mi(`var')
	di r(N)/`N' 
	}
// hardly makes diff for summary stats
foreach var of varlist ps_`type'_bl_all-ps_`type'_wh_f {
	di "`var'"
	su `var' 
	su `var' if `var'<=1 
	}
} // end type loop 



// combine singoos and multoos

*Now let me generate the total suspension variables by race subgroups 
//Generate 
foreach x in bl wh {  
	egen oos_`x'_all=rowtotal(singoos_`x'_all multoos_`x'_all)
	la var oos_`x'_all "total: singoos_`x'_all multoos_`x'_all" 
	replace oos_`x'_all=. if mi(singoos_`x'_all) & mi( multoos_`x'_all) 
	
	foreach g in m f {
		egen oos_`x'_`g'=rowtotal(singoos_`x'_`g'  multoos_`x'_`g' )
		la var oos_`x'_`g' "total: singoos_`x'_`g'  multoos_`x'_`g' " 
		replace oos_`x'_`g'=. if mi( singoos_`x'_`g') & mi(multoos_`x'_`g' ) 
		}
	}



*Let me generate rates. I will add ps as the prefix for percent students
*Let me generate the rates for each subgroup
foreach i in bl wh   { 
	
	gen ps_oos_`i'_all=oos_`i'_all /enr_`i'_all
	lab var ps_oos_`i'_all "`i' oos all /enr`i'"
	
	foreach g in m f {
		gen ps_oos_`i'_`g'=oos_`i'_`g' /enr_`i'_`g'
		lab var ps_oos_`i'_`g' "`i' oos `g' /enr_`i'_`g''"
		}
	
	}

*Now let me look at these
sum ps_oos_bl_all-ps_oos_wh_f
sum ps_oos_bl_all-ps_oos_wh_f, d

// how many are over 1? only a tiny fraction
foreach var of varlist ps_oos_bl_all-ps_oos_wh_f {
	di "`var'"
	qui count if !mi(`var')
	local N=r(N)
	count if `var'>1 & !mi(`var')
	di r(N)/`N' 
	}
// hardly makes diff for summary stats
foreach var of varlist ps_oos_bl_all-ps_oos_wh_f {
	di "`var'"
	su `var' 
	su `var' if `var'<=1 
	}



gen any_greater100=0
foreach var of varlist ps_* {
	replace any_greater100=1 if `var'>1 & `var'!=.
}
lab var any_greater100 "any of ps_* rates are greater than 100"
tab any_greater100, m
*551 have one  by gender and race over 100. This is 1.1% of the LEA*yr obs

tab schendyr any_greater100, m

/*
 
    End of |
    school |
      year | any of ps_* rates are
(2011-12=2 |   greater than 100
      012) |         0          1 |     Total
-----------+----------------------+----------
      2012 |    16,262        213 |    16,475 
      2014 |    16,558        190 |    16,748 
      2016 |    17,187        148 |    17,335 
-----------+----------------------+----------
     Total |    50,007        551 |    50,558 



*/

unique leaid if any_greater100==1
bys leaid: egen anygreat=mean(any_greater100)

sort leaid schendyr
list leaid schendyr any_greater100 anygreat if anygreat>0


	
*I will save these newly generated variables in the dataset.
compress
save "$data/1216_lealevel_panel_clean.dta", replace	

	// Create county level data
	keep leaid schendyr enr_bl_all enr_wh_all iss_bl_all iss_wh_all oos_bl_all oos_wh_all any_greater100
	destring leaid, gen(temp) force
		list leaid if mi(temp)
	
		sum enr* if mi(temp)
		// Drop these
		drop if mi(temp)
		drop temp
		destring leaid, replace
		
	// Merge with xwalk
	merge m:1 leaid using "$data/county_leaid_xwalk", keep(1 3)
	egen tag = tag(leaid)
	tab _merge if tag // 97%
	sum enr_bl_all enr_wh_all if _merge==1 // V. small districts
	keep if _merge==3
	drop _merge tag
	
		foreach var in iss oos {
			foreach r in bl wh {
				gen diff = `var'_`r'_all / enr_`r'_all
				tab diff if diff > 1
				sum enr_`r'_all if diff > 1
				drop diff
				}
				} // v small proportions
	 
	// Collapse to county level
	collapse (sum) enr_bl_all enr_wh_all iss_bl_all iss_wh_all oos_bl_all oos_wh_all, by(countyid)
save "$data/1216_countylevel_clean.dta", replace		

log close
